# knitr::opts_knit$set(verbose=T)
knitr::opts_chunk$set(
cache = TRUE
# dev = c("png")
)
options(knitr.kable.NA = "")
This tutorial was created based on the warfarin data set, which was originally published in:
O’Reilly (1968). Studies on coumarin anticoagulant drugs. Initiation of warfarin therapy without a loading dose. Circulation 1968, 38:169-177.
Warfarin is an anticoagulant normally used in the prevention of thrombosis and thromboembolism, the formation of blood clots in the blood vessels and their migration elsewhere in the body, respectively. The data set provides set of plasma warfarin concentrations and Prothrombin Complex Response in thirty normal subjects after a single loading dose. A single large loading dose of warfarin sodium, 1.5 mg/kg of body weight, was administered orally to all subjects. Measurements were made each 12 or 24h.
The dataset can be accessed using the following link: https://dataset.lixoft.com/data-set-examples/warfarin-data-set/
Below, we attach some useful background in pharmacokinetics, and the open-source software we will be using in this tutorial. rxode2/nlmixr2 and xgxr
Pharmacokinetics https://pharmacy.ufl.edu/files/2013/01/two-compartment-model.pdf
Exploratory Graphics Initiative (xGx) https://opensource.nibr.com/xgx/
nlmixr2 https://nlmixr2.org/
rxode2 https://nlmixr2.github.io/rxode2/
Interactive Clinical Pharmacology https://www.icp.org.nz/
ModViz POP https://pavanvaddady.shinyapps.io/modvizpop/
Acknowledgements Many of the nlmixr2 estimation parts of this tutorial were heavily inspired by the course codes developed by Rik Schoemaker. These will be presented during the advanced course that will take place in Day 2 of this symposium. The material of the advanced course can be freely accessed from this link: https://blog.nlmixr2.org/courses/page2024/
The PopSim team would also like to thank the rxode2/nlmxr2 team for value feedback and support for this course https://blog.nlmixr2.org/
Briefly about the warfarin data set:
## read in the Warfarin PK-only data set
PK_dose_data <- warfarin
PK_dose_data <- subset(warfarin, dvid == "cp", select = c(-dvid))
names(PK_dose_data) <- c("ID", "TIME", "AMT", "DV", "EVID", "WT", "AGE", "SEX")
## add sampling column
### find subjects with observations in the first 24 h after dosing
dense_id <- unique(PK_dose_data$ID[PK_dose_data$TIME < 24 & PK_dose_data$TIME > 0])
PK_dose_data$SPARSE <- ifelse(PK_dose_data$ID %in% dense_id, 0, 1)
# #ensure dataset has all the necessary columns
PK_dose_data <- PK_dose_data %>%
# Sex
mutate(SEXf = as.factor(SEX)) %>%
mutate(SEXf = factor(SEXf, labels = c('Female', 'Male'))) %>%
mutate(SEX = ifelse(SEXf == "Female", 0, 1)) %>%
# Sparse sampling
mutate(SPARSEf = as.factor(SPARSE)) %>%
mutate(SPARSEf = factor(SPARSEf, labels = c('Dense sampling', 'Sparse sampling'))) %>%
# body weight into tertiles
mutate(WTf = cut(WT,
breaks = quantile(WT, c(0:3/3)), include.lowest=T, labels = c("Low weight", "Medium weight", "High weight")) ) %>%
mutate(DV_UNIT = ifelse(EVID==0, "ng/mL", NA))
# Create a dosing dataset
Dose_data <- PK_dose_data %>% filter(EVID == 1)
# Create a PK dataset
PKdata <- PK_dose_data %>% filter(EVID == 0)
# Create a baseline characheristics dataset
baseline_char_data <- Dose_data %>%
select(ID, WT, AGE, SEX, SPARSE, SEXf, SPARSEf, WTf) %>%
mutate(ID = as.factor(ID))
#units and labels to be used in script
time_units_dataset = "hours"
time_units_plot = "days"
trtact_label = "Dose"
x_label = "Time after dose (hours)"
dose_label = "Dose (mg)"
conc_units = paste0("\U003BC","g/mL") # Somewhat complex code, but useful for using the Greek letter mu
AUC_units = paste0("h*", conc_units)
AUC_label = paste0("AUC (", AUC_units, ")")
conc_label = paste0("Concentration (", conc_units, ")")
cmax_label = paste0("Cmax (", conc_units, ")")
warfarin_label = paste0("Warfarin (", conc_units, ")")
concnorm_label = paste0("Normalized Concentration (", conc_units, ")/mg")
First, it is always a good idea to get a feel of the data you are going to work with.
An excellent resource that lays out the foundation of exploring PK and PK/PD data is the Exploratory Graphics (xGx) initiative, which can be accessed through this link:
https://opensource.nibr.com/xgx/
A lot of the material presented below have been heavily inspired by xGx and we strongly recommend to spend some time on the xGx website to get a feeling of how exploratory graphics can be used to help us understand our data.
First, let’s construct some baseline characteristics tables.
summary_data <- baseline_char_data %>%
group_by(SPARSEf) %>%
summarise(SEX_proportion = mean(SEX)*100,
WT_mean = mean(WT),
AGE_mean = mean(AGE),
AGE_sd = sd(AGE)) %>%
mutate_if(is.numeric, round, 1)
# Here, we create a nice looking summary table, using the flextable package.
# It is not necessary to understand flextable, but it is good to be aware that R language is
# very versatile and can help up create any outputs we may be interested in.
summary_data %>%
flextable() %>%
set_header_labels(SPARSEf = "Sampling scheme",
SEX_proportion = "Proportion male (%)",
WT_mean = "Mean weight (kg)",
AGE_mean = "Mean age (years)",
AGE_sd = "SD age (years)") %>%
set_table_properties(layout = "autofit", width = .7) %>%
align(align = "center", part = "all") %>%
add_header_lines("Table: Summary of baseline characteristics")
Table: Summary of baseline characteristics | ||||
|---|---|---|---|---|
Sampling scheme | Proportion male (%) | Mean weight (kg) | Mean age (years) | SD age (years) |
Dense sampling | 61.5 | 66.8 | 38.7 | 10.2 |
Sparse sampling | 100.0 | 72.2 | 25.7 | 7.0 |
We can even change how we summarize the data.
summary_data_2 <- baseline_char_data %>%
group_by(SPARSEf, SEXf) %>%
summarise(WT_mean = mean(WT),
AGE_mean = mean(AGE),
AGE_sd = sd(AGE)) %>%
mutate_if(is.numeric, round, 1)
summary_data_2 %>%
flextable() %>%
set_header_labels(SPARSEf = "Sampling",
SEXf = "Sex",
WT_mean = "Mean weight (kg)",
AGE_mean = "Mean age (years)",
AGE_sd = "SD age (years)") %>%
set_table_properties(layout = "autofit", width = .7) %>%
align(align = "center", part = "all") %>%
add_header_lines("Table: Summary of baseline characteristics")
Table: Summary of baseline characteristics | ||||
|---|---|---|---|---|
Sampling | Sex | Mean weight (kg) | Mean age (years) | SD age (years) |
Dense sampling | Female | 51.3 | 34.4 | 7.9 |
Dense sampling | Male | 76.4 | 41.4 | 11.0 |
Sparse sampling | Male | 72.2 | 25.7 | 7.0 |
We can also create graphical correlation plots
GGally::ggpairs(baseline_char_data,
columns = c('WT', 'AGE', "SEXf"),
diag = list(continuous = "barDiag"))
As we can see from the summary table, there are no female subjects in the sparse sampling group. Now let’s try to take a look at how the PK profiles look like.
Here we can see the our very first exploratory graph of our data. Points connected with lines represents the measured plasma concentrations over time for each subject. We can already start appreciating the variability in the data (remember, everyone received the same dose!).
Pro tip This is an interactive graph. You can try to zoom in and out to explore different areas of the curve. Pay special attention to the absorption phase!
my_first_plot <- ggplot(PKdata, aes(x=TIME, y = DV, group = ID)) +
geom_point() +
geom_line() +
labs(x = x_label, y = warfarin_label)
ggplotly(my_first_plot)
We can also try to split the data into relevant sub populations For example, in the warfarin data set, we have subjects with either dense or sparse sampling Let’s see how these profiles look like.
my_second_plot <- my_first_plot + facet_wrap(~SPARSEf)
ggplotly(my_second_plot)
Coding Tip Let’s change the names of the objects we store the plots to something easier (see the folded R code). Let’s try to color by Sex. See how we combined the code from above to create a similar result
g1 <- ggplot(PKdata, aes(x=TIME, y = DV, group = ID, color = SEXf)) +
geom_point() +
geom_line() +
facet_wrap(~SPARSEf) +
labs(x = "Time after dose (hours)", y = conc_label) +
scale_color_discrete(name = "Sex")
ggplotly(g1) %>% layout(legend = center_legend)
Optional code
The code that follows is optional as many of these steps have been automated. The calculations in this part are included for students who are interested to know what happens in the back end of ready-to-use functions that we use throughout these exercises.
Let’s create a summary plot - This is what we usually look at after all.
sumdata <- PKdata %>%
group_by(SPARSEf, TIME) %>%
summarise(mean.conc = mean(DV),
sd.conc = sd(DV),
n.conc = n()) %>%
mutate(se.conc = sd.conc / sqrt(n.conc),
lower.ci.conc = mean.conc - qt(1 - (0.05 / 2), n.conc - 1) * se.conc,
upper.ci.conc = mean.conc + qt(1 - (0.05 / 2), n.conc - 1) * se.conc)
g2 <- ggplot(sumdata, aes(x=TIME, y = mean.conc)) +
geom_errorbar(aes(ymin = lower.ci.conc, ymax = upper.ci.conc), width = 5) +
geom_point() +
geom_line() +
facet_wrap(~SPARSEf) +
ylim(0, NA) +
labs(x = x_label, y = warfarin_label, caption = "Points with bars represent the mean with 95% Confidence Intervals")
g2
g3 <- g2 + scale_y_log10(breaks = c(1,2,4,8,16))
suppressWarnings(print(g3))
Many of the steps under the optional code
section have been automated. For example we can create a similar looking
plot, using just one line of code from the xgxr package. Note that we
only use one line of code to plot our the median with 95% confidence
intervals. Also, we can use some convenient commands for manipulating
the names of the x and y axes, as well as change the time unit from
hours to days (see folded code)
g <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95) + # xGx package
facet_wrap(~SPARSEf) +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label)
g
g1 <- g + xgx_scale_y_log10(breaks = c(1,2,4,8,16)) +
coord_cartesian(ylim = c(1,16))
suppressWarnings(print(g1))
We can immediately appreciate that the elimination phase in the semi-log scale plot appears linear. This can give us some initial information that warfarin distribution kinetics can be described using a 1-compartment pharmacokinetic model. More on that during the modeling hands-on session.
When we want to get an initial feeling of how some covariates may influence our drugs pharmacokinetics, we can start by working through some simple exploratory analyses. Some common tools in our tool set, is coloring and faceting across different variables. This can be done very easily with ggplot and xGx.
g <- ggplot(data = PKdata, aes(x = TIME, y = DV, color = SEXf)) +
xgx_stat_ci(conf_level = .95) + # xGx package
facet_wrap(~SPARSEf) +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label) +
scale_color_discrete(name = "Sex")
g
It looks like there is not a big difference between males and females. But how about weight?
Here, we use the calculated ‘Weight tertile’:
quantile(PK_dose_data$WT, c(0:3/3))
quartiles <- quantile(PK_dose_data$WT, c(0:3/3))
# Create the data frame with Weight tertile and Weight range
weight_data <- data.frame(
"Weight tertile" = c("Low", "Medium", "High"),
"Weight range" = c(
paste0(quartiles[1], " to ", quartiles[2], " kg"),
paste0(quartiles[2], " to ", quartiles[3], " kg"),
paste0(quartiles[3], " to ", quartiles[4], " kg")
)
)
flextable(weight_data) %>%
set_header_labels(
Weight.tertile = "Weight tertile",
Weight.range = "Weight range") %>%
width(width = 1.5) %>%
align(align = "center", part = "all")
Weight tertile | Weight range |
|---|---|
Low | 40 to 62 kg |
Medium | 62 to 75.3 kg |
High | 75.3 to 102 kg |
g <- ggplot(data = PKdata, aes(x = TIME, y = DV, color = WTf)) +
xgx_stat_ci(conf_level = .95) +
facet_wrap(~SPARSEf) +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label) +
scale_color_discrete(name = "Weight tertile")
g
It looks like there may be an effect of weight on the PK profiles.
It can also be useful to look at the individual profiles one at a time.
g1 <- ggplot(PKdata, aes(x=TIME, y = DV, group = ID, color = SEXf)) +
geom_point() +
geom_line() +
facet_wrap(~ID) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label) +
scale_color_discrete(name = "Sex")
g1
We can always perform simple non-compartmental analysis (NCA) analyses with our PK data.
In in the code snippet below, we construct some basic calculations for deriving the Area Under the Curve (AUC), as well as the maximum observed concentration (Cmax).
# Perform NCA, for additional plots
NCA <- PKdata %>%
group_by(ID) %>%
filter(!is.na(DV)) %>%
summarize(AUC_last = caTools::trapz(TIME,DV),
Cmax = max(DV)) %>%
mutate(ID = as.factor(ID)) # needed for the join operation below
NCAdat <- suppressMessages(left_join(NCA, baseline_char_data))
NCAdat_dense <- NCAdat %>% filter(SPARSEf == "Dense sampling")
Question: Is there anything that we should take into account before exploring the calculated AUC and Cmax? By what would the AUC and Cmax calculations be affected?
Remember the structure of our data! We have both dense and sparse sampling.
Question: Let’s take a look at a comparison between the summary exposure metrics (AUC and Cmax) between the two sampling schemes. What do we see?
g_AUC <- ggplot(data = NCAdat, aes(x = SPARSEf, y = AUC_last)) +
geom_boxplot(aes(group = SPARSEf)) +
ylab(AUC_label) +
xlab("Sampling scheme")
g_Cmax <- ggplot(data = NCAdat, aes(x = SPARSEf, y = Cmax)) +
geom_boxplot(aes(group = SPARSEf)) +
ylab(cmax_label) +
xlab("Sampling scheme")
g_AUC + g_Cmax
NCA metrics based on full PK profiles (dense sampling)
g_AUC <- ggplot(data = NCAdat_dense, aes(x = SEXf, y = AUC_last)) +
geom_boxplot(aes(group = SEXf)) +
ylab(AUC_label) +
xlab("Sex")
g_Cmax <- ggplot(data = NCAdat_dense, aes(x = SEXf, y = Cmax)) +
geom_boxplot(aes(group = SEXf)) +
ylab(cmax_label) +
xlab("Sex")
g_AUC + g_Cmax
g_AUC <- ggplot(data = NCAdat_dense, aes(x = WTf, y = AUC_last)) +
geom_boxplot(aes(group = WTf)) +
ylab(AUC_label) +
xlab('') +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust=1))
g_Cmax <- ggplot(data = NCAdat_dense, aes(x = WTf, y = Cmax)) +
geom_boxplot(aes(group = WTf)) +
ylab(cmax_label) +
xlab('') +
theme(axis.text.x = element_text(angle = 15, vjust = 0.5, hjust=1))
g_AUC + g_Cmax
g_AUC <- ggplot(data = NCAdat_dense, aes(x = WT, y = AUC_last)) +
geom_point() +
ylab(AUC_label) +
xlab("Body Weight (kg)") +
geom_smooth(formula = y ~ x, method="lm")
g_Cmax <- ggplot(data = NCAdat_dense, aes(x = WT, y = Cmax)) +
geom_point() +
ylab(cmax_label) +
xlab("Body Weight (kg)") +
geom_smooth(formula = y ~ x, method="lm")
g_AUC + g_Cmax
g_AUC_sex <- g_AUC +
aes(color = SEXf) +
scale_color_discrete(name = "Sex")
g_Cmax_sex <- g_Cmax + aes(color = SEXf) +
scale_color_discrete(name = "Sex")
ggpubr::ggarrange(g_AUC_sex, g_Cmax_sex, common.legend = T)
Lets us do our first simulation for warfarin. Remember, the PK profiles for Warfarin look like this:
g <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95) + # xGx package
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label)
g
Let’s build a simulation model. First, we set up the system of ordinary differential equations (ODEs). One good way to think about ODEs in PK, is to try and conceptualize the “bathtub model”
Bathtub model concept https://www.tcpharm.org/pdf/10.12793/tcp.2015.23.2.42
## set up the system of differential equations (ODEs)
odeKA1 <- "
CL = TV_CL;
V = TV_V;
KA = TV_KA;
KE0 = (CL/V);
d/dt(depot) = -KA * depot;
d/dt(central) = KA * depot - KE0*central;
C1 = central/V;
"
## compile the model
modKA1 <- rxode2(model = odeKA1)
Provide the parameter values In our first example, we (somewhat randomly) set the absorption rate constant (ka) equal to ~1.3 1/h, Clearance (CL) equal to 0.0675 L/h and the Volume of distribution (V) equal to 15 L.
## provide the parameter values to be simulated:
Params <- c( TV_KA = 0.5, # 1/h (absorption half-life of 30 minutes)
TV_CL = 0.0675, # L/h
TV_V = 15 # L
)
Subsequently, we need to create an event table. The event table stores the dosing information (how much? how often?) and the sampling information (how often?)
For this first example, we will simulate a single dose for simplicity. (Remember the actual dosing was done as mg/kg and thus it varies!). How much dose (in mg) was given to the subjects? Let’s look at some quantiles.
# How much dose (in mg) was given to the subjects? (Remember the dosing was done as mg/kg and thus it varies)
quantile(Dose_data$AMT)
## 0% 25% 50% 75% 100%
## 60.00 92.25 107.50 117.75 153.00
Looks like the median dose was close 100 mg. Let’s use that dose for our simulation.
Now we have everything we need for our first simulation. The
simulation is done using the rxSolve command (click on the
Show button to see the commands).
## create an empty event table that stores both dosing and sampling information :
ev <- eventTable(amount.units = 'mg', time.units = "hr")
## add a dose to the event table:
ev$add.dosing(dose = 100) #mg
## add time points to the event table where concentrations will be simulated; these actions are cumulative
ev$add.sampling(seq(0, 120, 0.1))
## Then solve the system
##
## The output from rxSolve is a solved RxODE object,
## but by making it a data.frame only the simulated values are kept:
Res <- data.frame(rxSolve(modKA1,Params,ev))
Now, we can plot the simulated outcomes in the compartments
g_depot <- ggplot(data = Res, aes(x=time, y=depot)) +
geom_line(linewidth=2, colour = "blue") +
labs(x = "Time (hours)", y = 'Simulated Warfarin \namount (mg)', title = 'Simulated amount in depot (dosing) compartment')
g_central <- ggplot(data = Res, aes(x=time, y=C1)) +
geom_line(linewidth=2, colour = "blue") +
labs(x = "Time (hours)", y = 'Simulated Warfarin \nconcentrations (ug/mL)', title = "Simulated concentrations in central compartment")
g_depot / g_central
Let us now overlay the simulated profile with the observed data:
g <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95, geom = list("point","errorbar")) +
geom_line(data = Res, aes(x=as.numeric(time), y=C1), linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label)
g
What do you think? Is this a good fit for the observed
concentration data?
Not so much… What if we provide better values?
You may recall our initial estimates:
| Parameters | Estimate |
|---|---|
| ka (1/h) | 0.5 |
| CL (L/h) | 0.0675 |
| V (L) | 15 |
Let’s update the parameters to:
| Parameters | Estimate | |
|---|---|---|
| ka (1/h) | 1.4 | |
| CL (L/h) | 0.135 | |
| V (L) | 8 |
Params <- c(TV_KA = log(2) / 0.5, # 1/h (absorption half-life of 30 minutes)
TV_CL = 0.135, # L/h
TV_V = 8 # L
)
## provide the parameter values to be simulated:
Params <- c( TV_KA = log(2) / 0.5, # 1/h (absorption half-life of 30 minutes)
TV_CL = 0.135, # L/h
TV_V = 8 # L
)
Res <- data.frame(rxSolve(modKA1,Params,ev))
# Let us overlay the simulated profile with the observed data:
g <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95, geom = list("point","errorbar")) +
geom_line(data = Res, aes(x=as.numeric(time), y=C1), linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label)
g
This is much better! - By varying the parameters, we can get a better description of the data we have. This is what the estimation algorithms do for us. By varying all parameters multiple times, we can end up with having a model with parameters that describes our data. The goodness of statistical fit can be assessed with the “Objective Function Value”, as well as a variety of goodness-of-fit plots that we have at our disposal. More on that is described in the modeling sections.
We can also create multiple simulation scenarios, such as multiple dosing, simulations with between subject variability and simulations with random residual error.
Here we create a simulation of five 100 mg doses, given once daily (every 24 h). Notice that we overlay the observations even though they are from a single dose study. This is helpful for making sure that the model captures the data well following the first dose, and importantly, to help us appreciate the accumulation of the drug over time.
ev_multiple <- eventTable(amount.units = 'mg', time.units = "hr") %>%
add.dosing(dose=100,
nbr.doses=5,
dosing.interval=24)
Res <- data.frame(rxSolve(modKA1,Params,ev_multiple))
# Let us overlay the simulated profile with the observed data:
g <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95, geom = list("point","errorbar")) +
geom_line(data = Res, aes(x=as.numeric(time), y=C1), linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label)
g
Population models can go one step further and simulate multiple potential concentration time courses. We know that every patient is different, which means that variability in the pharmacokinetic profiles may come from various intrinsic or extrinsic factors. With population models, we can simulate different profiles, but assuming that there is some randomness around the structural parameters of the structural model.
These concepts might be somewhat confusing at first, but let’s try to use the same model as before, but now simulate 30 patients receiving 100 mg warfarin once daily. First, we create a simulation where only the clearance (CL) varies. If you look at the code you can see that there are parameters for variability on ka and V as well, but the variance is set at 0.0001, which is ~ 0.
# Simulation with variability in clearance only
omega <- lotri(eta.CL ~ 0.4^2,
eta.V ~ 0.0001, # Very small value
eta.KA ~ 0.0001)`
In the second example, we create a simulation, were all parameters are allowed to vary.
# Simulation with variability in all parameters
omega <- lotri(eta.CL ~ 0.4^2,
eta.V ~ 0.4^2,
eta.KA ~ 0.4^2)
Question: What do you observe? Can you describe the differences between these simulated profiles?
odeKA1_IIV <- "
CL = TV_CL * exp(eta.CL);
V = TV_V * exp(eta.V);
KA = TV_KA * exp(eta.KA);
KE0 = (CL/V);
d/dt(depot) = -KA * depot;
d/dt(central) = KA * depot - KE0*central;
C1 = central/V * (1+prop.err);
"
## compile the model
modKA1_IIV <- rxode2(model = odeKA1_IIV)
## provide the parameter values to be simulated with proportional error
Params_error <- c( TV_KA = log(2) / 0.5, # 1/h (absorption half-life of 30 minutes)
TV_CL = 0.135, # L/h
TV_V = 8, # L
prop.err = 0
)
# Simulation with variability in clearance only
omega <- lotri(eta.CL ~ 0.4^2,
eta.V ~ 0.0001, # Very small value
eta.KA ~ 0.0001)
Res_1 <- data.frame(rxSolve(odeKA1_IIV, Params_error, ev_multiple, omega=omega, nSub=30))
# Simulation with variability in all parameters
omega <- lotri(eta.CL ~ 0.4^2,
eta.V ~ 0.4^2,
eta.KA ~ 0.4^2)
Res_2 <- data.frame(rxSolve(odeKA1_IIV, Params_error, ev_multiple, omega=omega, nSub=30))
g_1 <- ggplot(data = Res_1, aes(x=as.numeric(time), y=C1, group=sim.id)) +
geom_line(linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(title = "Simulation with variability on CL", y=conc_label)
g_2 <- ggplot(data = Res_2, aes(x=as.numeric(time), y=C1, group=sim.id)) +
geom_line(linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(title = "Simulation with variability on all parameters", y=conc_label)
g_1 + g_2
Now let us do our first parameter estimation (click on the
Show button to see the commands).
For the following example, we are going to define a one-compartment model, with linear absorption and linear elimination kinetics and fit the model to the warfarin data set. Notice that we will only estimate inter-individual variability (IIV) for clearance (eta.cl). Notice that we have ‘commented-out’ the code for estimating IIV in Ka and V. This is because we plan to explore the influence of estimating IIVs in Ka and V later in the course. For our residual error model, we choose to start by estimating a proportional error only.
# ------------------------------------------------------
# Run 001. One comp, lin. abs and elim. Eta on CL only - proportional residual error
# ------------------------------------------------------
One.comp.KA.ODE <- function() {
ini({
# Where initial conditions/variables are specified
lka <- log(1.15) #log ka (1/h)
lcl <- log(0.135) #log Cl (L/h)
lv <- log(8) #log V (L)
prop.err <- 0.15 #proportional error (SD/mean)
# add.err <- 0.6 #additive error (mg/L)
# eta.ka ~ 0.5
eta.cl ~ 0.1
# eta.v ~ 0.1
})
model({
# Where the model is specified
cl <- exp(lcl + eta.cl)
v <- exp(lv)
ka <- exp(lka)
## ODE example
d/dt(depot) = -ka * depot # depot is defined as amount (i.e. the unit is mg)
d/dt(central) = ka * depot - (cl/v) * central
## Concentration is calculated
cp = central/v
## And is assumed to follow proportional error
cp ~ prop(prop.err) # + add(add.err)
})
}
rxClean()
#
if(run.estimation == T) {
#
# ## estimate parameters using nlmixr/FOCEI:
run001 <- nlmixr(One.comp.KA.ODE, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)\
#FOCEi options:
foceiControl(print = 20)) #only print every 20th estimation step
# # and saved for future use or reference:
save(run001, file = "run001.Rdata")
} else {
load(file = "run001.Rdata")
}
We can now take a look at the estimated fixed effect parameters. Remember that fixed effects (theta’s) are on the log scale, and therefore, the numbers in the ‘Est.’ column are the log-values, while the back-transformed (exponentiated) values are in the ‘Back-transformed (95% CI)’ column. IIV is presented as %CV in the ‘BSV’ column (between-subject variabilty).
flextable(run001$parFixed)
Parameter | Est. | SE | %RSE | Back-transformed(95%CI) | BSV(CV%) | Shrink(SD)% |
|---|---|---|---|---|---|---|
log ka (1/h) | -0.31 | 0.158 | 51.1 | 0.734 (0.538, 1) |
|
|
log Cl (L/h) | -2.02 | 0.0467 | 2.31 | 0.132 (0.121, 0.145) | 24.3 | 3.45%< |
log V (L) | 2.03 | 0.0523 | 2.57 | 7.64 (6.9, 8.46) |
|
|
proportional error (SD/mean) | 0.269 | 0.269 |
|
|
We’ve now estimated parameter estimates but we don’t know how well
these parameters describe the data. Let’s simulate concentrations based
on the estimated parameters and compare to the observed data. We
simulate using the same function as in Hands-on 1 (2.2 Understanding PK
simulations)
Res <- data.frame(rxSolve(modKA1,Params,ev)), but we
update the parameter estimates (Params).
See how in the folded Code (click on the
Show button).
#################################################################################
## Hands-on assignment:
## Try to grab the estimated parameters from above, simulate and see how the
## model fits the data
#################################################################################
Estimated_parameters <- exp(run001$fixef)
# provide the parameter values to be simulated - These come from the model we just estimated above.
#
Params_est <- c(TV_KA = Estimated_parameters[['lka']], # 1/h (aborption half-life of 30 minutes)
TV_CL = Estimated_parameters[['lcl']], # L/h
TV_V = Estimated_parameters[['lv']] # L
)
rxClean()
Res_est <- data.frame(rxSolve(modKA1,
Params_est,
ev))
Let us overlay the simulated profile (Res_est) with the
observed data (PKdata):
# Let us overlay the simulated profile with the observed data:
g_lin <- ggplot(data = PKdata, aes(x = TIME, y = DV)) +
xgx_stat_ci(conf_level = .95, geom = list("point","errorbar")) +
geom_line(data = Res_est, aes(x=as.numeric(time), y=C1), linewidth=1, colour = "blue") +
ylim(0, NA) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label, subtitle = 'Linear y scale')
# Zoom in on the first day
g_lin_2 <- g_lin + coord_cartesian(xlim = c(0,24)) + labs(y=conc_label, subtitle = 'Linear y scale \nZoom in on the first day')
# log scale
g_log <- g_lin + xgx_scale_y_log10(breaks = c(1,2,4,8,16)) + coord_cartesian(ylim = c(1,20)) + labs(y=conc_label, subtitle = 'Log y scale')
# Zoom in on the first day
g_log_2 <- g_log + coord_cartesian(xlim = c(0,24)) + labs(y=conc_label, subtitle = 'Log y scale \nZoom in on the first day')
(g_lin + g_lin_2) / (g_log + g_log_2)
This fit looks pretty good! The estimation was a success and now we have a model that describes our data well - at least on a population level.
Question: Can we do something to improve the fit?
It looks like the absorption phase is not captured adequately. There is a constant model over-prediction that we need to think about. Can you see that there is a delay on the appearance of concentrations in plasma as compared to what the model predicts? We will try to capture this with some model development exercises below.
The strength of population modeling however, is that we are able to describe all individuals in our dataset - despite their intrinsic and extrinsic differences. What do we mean by that?
Let’s see how the individual predictions look like!
plot(augPred(run001))
Now let’s look a bit closer at the individual and population predictions. The individual predictions vary, because we made the calculated assumption that the clearance may be different between them, and then we were able to estimate this random difference. But the population predictions are clearly different between subjects as well. They might not be as variable as the individual predictions, but still… Can you guess why?
Remember the dosing! How were our patients dosed? Did they all receive the same amount?
augpred_dat <- augPred(run001)
# -----------------------------------------------------------
# Somewhat technical code below. Understanding it right now is out of scope for this course
# augpred_dat does not carry the id forward as expected. As a workaround, we need to create a dataset to map ID to id
ID_frame = data.frame(ID = unique(run001$ID),
id = unique(augpred_dat$id))
augpred_cov_dat <- augpred_dat %>%
left_join(ID_frame) %>%
mutate(DV = values,
TIME = time) %>%
select(-id) %>%
left_join(baseline_char_data)
# Individual Predictions
g_ipred <- ggplot(subset(augpred_cov_dat, ind=="Individual"), aes(x=TIME, y=DV, group=ID)) +
geom_line() +
geom_point(data = subset(augpred_cov_dat, ind=="Observed")) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label, subtitle = 'Individual predictions', caption = "Lines are individual predictions \nPoints are observations")
# Population Predictions
g_pred <- ggplot(subset(augpred_cov_dat, ind=="Population"), aes(x=TIME, y=DV, group=ID)) +
geom_line() +
geom_point(data = subset(augpred_cov_dat, ind=="Observed")) +
xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot) +
labs(y=conc_label, subtitle = 'Population predictions', caption = "Lines are population predictions \nPoints are observations")
g_ipred + g_pred
A somewhat more interesting plot may be when we investigate the
model predictions when spiting the graph between the sampling
schemes.
Question: What do you observe? How can we use our model to fill in the gaps ?
(g_ipred + facet_wrap(~SPARSEf))
Now, let’s look at a table with all individual parameter
estimates, technically refered to as the Empirical Bayes Estimates
(EBEs)
## Extract EBEs from the model object:
EBEs <- as.data.table(run001) %>%
group_by(ID) %>%
slice(1) %>%
mutate(KA = ka,
CL = cl,
V = v) %>%
select(ID, KA, CL, V)
# combine to create a table with all individual parameters
table_dat <- left_join(baseline_char_data, EBEs) %>%
mutate_if(is.numeric, round, 2)
table_dat %>%
dplyr::select(ID, WT, AGE, SEXf, SPARSEf, KA, CL, V) %>%
flextable() %>%
set_header_labels(SPARSEf = "Sampling",
WT = "Weight (kg)",
AGE = "Age (years)",
SEXf = 'Sex',
KA = "Ind. Ka (1/h)",
CL = "Ind. CL (L/h)",
V = "Ind. V (L)") %>%
set_table_properties(layout = "autofit", width = .9) %>%
align(align = "center", part = "all") %>%
footnote(i = 1, j = 6:8,
value = as_paragraph(
c("Individual paramter estimates")),
ref_symbols = c("a"),
part = "header") %>%
valign(valign = "bottom", part = "header") %>%
autofit
ID | Weight (kg) | Age (years) | Sex | Sampling | Ind. Ka (1/h)a | Ind. CL (L/h)a | Ind. V (L)a |
|---|---|---|---|---|---|---|---|
1 | 66.7 | 50 | Male | Dense sampling | 0.73 | 0.27 | 7.64 |
2 | 66.7 | 50 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 |
3 | 66.7 | 31 | Male | Dense sampling | 0.73 | 0.14 | 7.64 |
4 | 80.0 | 40 | Male | Dense sampling | 0.73 | 0.12 | 7.64 |
5 | 40.0 | 46 | Female | Dense sampling | 0.73 | 0.08 | 7.64 |
6 | 75.3 | 43 | Male | Dense sampling | 0.73 | 0.15 | 7.64 |
7 | 60.0 | 36 | Female | Dense sampling | 0.73 | 0.21 | 7.64 |
8 | 90.0 | 41 | Male | Dense sampling | 0.73 | 0.16 | 7.64 |
9 | 50.0 | 27 | Female | Dense sampling | 0.73 | 0.12 | 7.64 |
10 | 70.0 | 28 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 |
12 | 82.0 | 31 | Male | Dense sampling | 0.73 | 0.13 | 7.64 |
13 | 75.3 | 32 | Male | Dense sampling | 0.73 | 0.14 | 7.64 |
14 | 75.3 | 63 | Male | Dense sampling | 0.73 | 0.16 | 7.64 |
15 | 50.0 | 36 | Female | Dense sampling | 0.73 | 0.17 | 7.64 |
16 | 56.7 | 27 | Female | Dense sampling | 0.73 | 0.11 | 7.64 |
17 | 58.0 | 22 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 |
18 | 78.0 | 28 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 |
19 | 74.7 | 31 | Male | Sparse sampling | 0.73 | 0.18 | 7.64 |
20 | 63.7 | 22 | Male | Sparse sampling | 0.73 | 0.17 | 7.64 |
21 | 59.0 | 22 | Male | Sparse sampling | 0.73 | 0.10 | 7.64 |
22 | 62.0 | 27 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 |
23 | 58.0 | 22 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 |
24 | 73.3 | 22 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 |
25 | 76.7 | 35 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 |
26 | 74.7 | 23 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 |
27 | 80.0 | 23 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 |
28 | 80.0 | 22 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 |
29 | 80.0 | 22 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 |
30 | 102.0 | 22 | Male | Sparse sampling | 0.73 | 0.16 | 7.64 |
31 | 70.0 | 23 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 |
32 | 83.3 | 24 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 |
33 | 62.0 | 21 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 |
aIndividual paramter estimates | |||||||
Question: Notice that every subject has a different clearance (CL). But everyone has the same absorption rate constant (Ka), and Volume of distribution (V). Why is that?
Remember that we estimated IIV only! for clearance! This means that we assumed that everyone has the same Ka and V, but different Clearance values. Is this the best model we can arrive to? Maybe not! We will explore more models during the modeling session.
Can you calculate the individual \(t_½\)? Can you create a histogram of the \(t_½\)?
# -------------------------------
# exercise
# Can you calculate the individual t 1/2?
# can you create a histogram of the t 1/2?
# -------------------------------
# Answer
table_dat <- table_dat %>%
mutate(Ke0 = CL/V, # 1/hours
t_half = log(2)/Ke0) %>% # hours
mutate_if(is.numeric, round, 3)
table_dat %>%
dplyr::select(ID, WT, SEXf, SPARSEf, KA, CL, V, Ke0, t_half) %>%
flextable() %>%
set_header_labels(SPARSEf = "Sampling",
WT = "Weight (kg)",
SEXf = 'Sex',
KA = "Ind. Ka (1/h)",
CL = "Ind. CL (L/h)",
V = "Ind. V (L)",
Ke0 = "Ind. Ke0 (1/h)",
t_half = "Ind. T1/2 (h)") %>%
set_table_properties(layout = "autofit", width = 1) %>%
align(align = "center", part = "all") %>%
footnote(i = 1, j = 6:9,
value = as_paragraph(
c("Individual paramter estimates")),
ref_symbols = c("a"),
part = "header") %>%
valign(valign = "bottom", part = "header") %>%
autofit
ID | Weight (kg) | Sex | Sampling | Ind. Ka (1/h) | Ind. CL (L/h)a | Ind. V (L)a | Ind. Ke0 (1/h)a | Ind. T1/2 (h)a |
|---|---|---|---|---|---|---|---|---|
1 | 66.7 | Male | Dense sampling | 0.73 | 0.27 | 7.64 | 0.035 | 19.613 |
2 | 66.7 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
3 | 66.7 | Male | Dense sampling | 0.73 | 0.14 | 7.64 | 0.018 | 37.826 |
4 | 80.0 | Male | Dense sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
5 | 40.0 | Female | Dense sampling | 0.73 | 0.08 | 7.64 | 0.010 | 66.196 |
6 | 75.3 | Male | Dense sampling | 0.73 | 0.15 | 7.64 | 0.020 | 35.304 |
7 | 60.0 | Female | Dense sampling | 0.73 | 0.21 | 7.64 | 0.027 | 25.217 |
8 | 90.0 | Male | Dense sampling | 0.73 | 0.16 | 7.64 | 0.021 | 33.098 |
9 | 50.0 | Female | Dense sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
10 | 70.0 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
12 | 82.0 | Male | Dense sampling | 0.73 | 0.13 | 7.64 | 0.017 | 40.736 |
13 | 75.3 | Male | Dense sampling | 0.73 | 0.14 | 7.64 | 0.018 | 37.826 |
14 | 75.3 | Male | Dense sampling | 0.73 | 0.16 | 7.64 | 0.021 | 33.098 |
15 | 50.0 | Female | Dense sampling | 0.73 | 0.17 | 7.64 | 0.022 | 31.151 |
16 | 56.7 | Female | Dense sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
17 | 58.0 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
18 | 78.0 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 | 0.018 | 37.826 |
19 | 74.7 | Male | Sparse sampling | 0.73 | 0.18 | 7.64 | 0.024 | 29.420 |
20 | 63.7 | Male | Sparse sampling | 0.73 | 0.17 | 7.64 | 0.022 | 31.151 |
21 | 59.0 | Male | Sparse sampling | 0.73 | 0.10 | 7.64 | 0.013 | 52.956 |
22 | 62.0 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 | 0.018 | 37.826 |
23 | 58.0 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
24 | 73.3 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 | 0.017 | 40.736 |
25 | 76.7 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
26 | 74.7 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
27 | 80.0 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
28 | 80.0 | Male | Sparse sampling | 0.73 | 0.12 | 7.64 | 0.016 | 44.130 |
29 | 80.0 | Male | Sparse sampling | 0.73 | 0.14 | 7.64 | 0.018 | 37.826 |
30 | 102.0 | Male | Sparse sampling | 0.73 | 0.16 | 7.64 | 0.021 | 33.098 |
31 | 70.0 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 | 0.017 | 40.736 |
32 | 83.3 | Male | Sparse sampling | 0.73 | 0.13 | 7.64 | 0.017 | 40.736 |
33 | 62.0 | Male | Sparse sampling | 0.73 | 0.11 | 7.64 | 0.014 | 48.142 |
aIndividual paramter estimates | ||||||||
ggplot(data = table_dat, aes(x=t_half)) +
geom_histogram(bins = 12, fill = 'gray') +
labs(x = 'Individual half life (h)')
With the histogram, we can certainly appreciate the variability in the calculated \(t_½\)
Remember that when we were trying to calculate NCA based exposure metrics (AUC and Cavg), we had to think about the sampling scheme, and how that may affect our NCA results. Because we have some subjects with full profiles and some with more sparse sampling, we had to compromise and decide that we can only use the full profile data. Otherwise we risked introducing bias due to the differences in the sampling scheme!
But look at these simulated profiles above. They are almost complete! This is because we were able to use the model to construct individual profiles. Oftentimes, these model predicted individual profiles are being used to fill in the gaps that may arise due to irregularities in the sample scheme, missing pk samples etc.
Let’s try to redo the NCA calculations. Now, we will use the model-based individual predicted profiles. This means that we can use the entire information from the population we are working with and gain some more power and insights as compared to standard NCA
# Perform NCA, for additional plots
augpred_dat_ipred <- augpred_cov_dat %>% filter(ind == "Individual")
NCA_model <- augpred_dat_ipred %>%
group_by(ID) %>%
filter(!is.na(DV)) %>%
summarize(AUC_last = caTools::trapz(TIME,DV),
Cmax = max(DV)) %>%
mutate(ID = as.factor(ID)) # needed for the join operation below
NCAdat_model <- suppressMessages(left_join(NCA_model, baseline_char_data))
NCA metrics based on individual model predicted PK profiles
g_AUC <- ggplot(data = NCAdat_model, aes(x = SEXf, y = AUC_last)) +
geom_boxplot(aes(group = SEXf)) +
ylab(AUC_label) +
xlab("Sex")
g_Cmax <- ggplot(data = NCAdat_model, aes(x = SEXf, y = Cmax)) +
geom_boxplot(aes(group = SEXf)) +
ylab(cmax_label) +
xlab("Sex")
g_AUC + g_Cmax
g_AUC_1 <- ggplot(data = NCAdat_model, aes(x = SEXf, y = AUC_last)) +
geom_boxplot(aes(group = SEXf)) +
ylab(AUC_label) +
xlab("Sex")
g_Cmax_1 <- ggplot(data = NCAdat_model, aes(x = SEXf, y = Cmax)) +
geom_boxplot(aes(group = SEXf)) +
ylab(cmax_label) +
xlab("Sex")
g_AUC <- ggplot(data = NCAdat_model, aes(x = WT, y = AUC_last)) +
geom_point() +
ylab(AUC_label) +
xlab("Body Weight (kg)") +
geom_smooth(formula = y ~ x, method="lm")
g_Cmax <- ggplot(data = NCAdat_model, aes(x = WT, y = Cmax)) +
geom_point() +
ylab(cmax_label) +
xlab("Body Weight (kg)") +
geom_smooth(formula = y ~ x, method="lm")
# Stratifying by sex
g_AUC_sex <- g_AUC +
aes(color = SEXf) +
scale_color_discrete(name = "Sex")
g_Cmax_sex <- g_Cmax + aes(color = SEXf) +
scale_color_discrete(name = "Sex")
(g_AUC_1 + g_Cmax_1) / (g_AUC + g_Cmax) / (g_AUC_sex + g_Cmax_sex)
Congratulations! You just performed your first modeling analysis and model application!
There are many model diagnostics that modelers have in their toolkit to evaluate the robustness of the models they work with. Here, we show some of the most often used ones. An excellent reference for model diagnostics can be found in this link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5321813/#psp412161-bib-0008
## We can use the new xpose package for which xpose.nlmixr provides the link
## the nlmixr object can be transformed into an xpose object to allow diagnostics with the new xpose package
## the link between nlmixr and xpose is provided by the xpose.nlmixr package
## only xpose_data_nlmixr is from xpose.nlmixr, all further commands are from the xpose package
xpdb.1s <- xpose_data_nlmixr(run001)
# ## Individual fits can be generated using using xpose:
xpdb.1s <- set_var_types(xpdb.1s,pred = 'PRED')
ind_plots(xpdb.1s, caption = NULL, ncol = 4, nrow = 4)
# ## ...use the arrows in the plot window to examine the earlier curves
Dependent variable (DV; plasma concentrations in this case)) vs model predictions
The data points should be scattered around the identity line (but not necessarily evenly). Trends may suggest a modification of structural model, residual error model or interindividual variability model.
## dv vs pred plot:
dv_vs_pred(xpdb.1s,
caption = NULL)
Dependent variable (DV; plasma concentrations in this case) vs individual model predictions The data points should be scattered around the identity line (but not necessarily evenly). Trends may suggest a modification of structural model, residual error model or interindividual variability model.
## dv vs ipred plot:
dv_vs_ipred(xpdb.1s, #the xpose object
caption = NULL) #if not NULL provides the directory where this was run
Conditional weighted residuals (CWRES) over time. Data points should be scattered around the horizontal zero-line (more or less evenly). Trends may suggest a modification of structural model, residual error model, or interindividual variability model.
## CWRES vs time:
res_vs_idv(xpdb.1s, #the xpose object
res = "CWRES", #examine conditional weighted residuals
idv = "TIME", #as a function of time
caption = NULL) #if not NULL provides the directory where this was run
Individual weighted residuals (IWRES) over time. The data points should be scattered around the horizontal zero-line (more or less evenly). Trends may suggest a modification of structural model, residual error model, or interindividual variability model.
#Absolute values of individual weighted residual vs time
absval_res_vs_idv(xpdb.1s, #the xpose object
res = "IWRES", #examine absolute values (absval) of individual weighted residuals
idv = "TIME", #as a function of time
caption = NULL) #if not NULL provides the directory where this was run
## ...this plot shows that there are some issues at the earlier time points that an updated absorption model may fix
This plot shows that there are some issues at the earlier time points. This may be something that we can fix using some other absorption model (refer to Model development exercises)
Visual predictive checks (VPCs) are the golden standard for model evaluation in PK/PD modeling. The underlying principle for creating VPC plots is that simulating from a model should create similar predictions to what we observed.
When we simulate many times from the same model, the stochastic (or random) nature of the model comes into effect. This means that if we e.g. simulate a single patient profile 10 times, then we will end up with 10 different (but similar looking) simulated profiles. This is because each simulated profile will have different (random) eta values sampled from the eta-distribution. If we simulate 10 patient profiles 10 times, then of course we will end up with 100 simulated profiles.
To qualify a model and argue that it describes the data well, what we want to see is that it captures the data central tendency and variability well. One way to do this, is first, summarize our data in percentiles (10, 50, and 90th percentiles) and then plot them out. Then we simulate with the model (usually 1000 times) and summarize our simulated data in percentiles (10, 50, and 90th percentiles) as well. When we overlay the observed percentiles to the simulated precentiles, we end up with the VPC!
Overall, the way to read a VPC, is: A model that can create simulated percentiles that are in good agreement with the observed percentiles, is a model we can qualify and argue that describes the data well.
#######################################################################
### ###
### Visual predictive checks ###
### ###
#######################################################################
## because the data set uses nominal time points, it is nice to have the bins surround these time points
## so that each time point falls in a bin
bin_mids <- sort(unique(PKdata$TIME))
bin_edges <- bin_mids - c(0, diff(bin_mids) / 2) # puts the edges in the middle of the nominal time points
SimVPC <- vpcSim(
run001, #the nlmixr object
n = 100)
# We need to make some of the variables in upper case for the tools to work
setnames(SimVPC,c("id","time","sim"),c("ID","TIME","DV"))
vpc_vpc(sim = SimVPC, obs = PKdata, bins=bin_edges,
# show = list(obs_dv = TRUE,
# obs_median=TRUE,
# sim_median=FALSE,
# sim_median_ci=TRUE,
# obs_ci=TRUE,
# pi=TRUE),
xlab = x_label,
ylab = conc_label,
title = "VPC for first order absorption PopPK model with linear y axis") +
labs(caption = "The dashed lines are the observed 10th and 90th percentiles
\nThe solid black line is the observed median are predicted percentiles.
\nBlue and light blue ribbons are the corresponding 95% confidence intervals"
)
#
# wrapper <- function(x, ...) {
# paste(strwrap(x, ...), collapse = "\n")
# }
my_caption1 <-
"Open circles: observed data.
Black line: Median for the observed data. \n\n\n"
vpc1 <- vpc_vpc(sim = SimVPC, obs = PKdata, bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = FALSE,
obs_ci = FALSE,
pi = FALSE,
pi_ci = FALSE),
xlab = x_label,
ylab = conc_label,
) + coord_cartesian(ylim = c(0,21)) +
labs(caption = my_caption1)
my_caption2 <-
"Open circles: observed data.
Black line: Median for the observed data.
Dashed lines: 90% CIs for the observed data. \n\n"
vpc2 <- vpc_vpc(sim = SimVPC, obs = PKdata, bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = FALSE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = FALSE),
xlab = x_label,
ylab = conc_label,
) + coord_cartesian(ylim = c(0,21)) +
labs(caption = my_caption2)
my_caption3 <-
"Open circles: observed data.
Black line: Median for the observed data.
Dashed lines: 90% CIs for the observed data.
Dark blue ribbon: 95% confidence interval of the simulated median. \n"
vpc3 <- vpc_vpc(sim = SimVPC, obs = PKdata, bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = TRUE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = FALSE),
xlab = x_label,
ylab = conc_label,
) + coord_cartesian(ylim = c(0,21)) +
labs(caption = my_caption3)
my_caption4 <-
"Open circles: observed data.
Black line: Median for the observed data.
Dashed lines: 90% CIs for the observed data.
Dark blue ribbon: 95% confidence interval of the simulated median.
Light blue ribbon: 95% confidence interval of the simulated 10 and 90th percentile."
vpc4 <- vpc_vpc(sim = SimVPC, obs = PKdata, bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = TRUE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = TRUE),
xlab = x_label,
ylab = conc_label,
) + coord_cartesian(ylim = c(0,21)) +
labs(caption = my_caption4)
vpc1
vpc2
vpc3
vpc4
Model development is typically a step-wise exercise.Some things that we are looking to explore during model development are:
In the following steps, we will add additional IIVs in our model and explore the influence of estimating them by looking at a statistical measure that is called the ’Objective Function Value”.
Various random effect models are run in this section.
Unfold Code to see the models (click on the
Show button).
# ------------------------------------------------------
# Model 2. Add Eta on Ka
# ------------------------------------------------------
if(run.estimation == T) {
mod002 <- One.comp.KA.ODE %>% addEta("ka") # Notice that we can simply add and IIV on Ka with the addEta command
run002 <- nlmixr(mod002, # the model definition
PK_dose_data, # the data set
est = "focei", # the estimation algorithm (FOCEi)
#FOCEi options:
foceiControl(print = 20)) # only print every 20th estimation step
# and saved for future use or reference:
save(run002, file = "~run002.Rdata")
} else {
load(file = "run002.Rdata")
}
# ------------------------------------------------------
# Model 3. Add Eta on V1
# ------------------------------------------------------
if(run.estimation == T) {
mod003 <- mod002 %>% addEta("v")
run003 <- nlmixr(mod003, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)
foceiControl(print = 20)) #only print every 20th estimation step
# and saved for future use or reference:
save(run003, file = "run003.Rdata")
} else {
load(file = "run003.Rdata")
}
# ----------------------------------------------------------------
# Model 4. Model based on run003 - Additive residual error only
# ----------------------------------------------------------------
if(run.estimation == T) {
mod004 <- mod003 %>% addResErr("addSd")
run004 <- nlmixr(mod004, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)
foceiControl(print = 20)) #only print every 20th estimation step
# and saved for future use or reference:
save(run004, file = "run004.Rdata")
} else {
load(file = "run004.Rdata")
}
# ----------------------------------------------------------------
# Model 5. Model based on run003 - Combined additive and proportional residual error
# ----------------------------------------------------------------
if(run.estimation == T) {
mod005 <- mod003 %>% addResErr(c("addSd", "propSd"))
run005 <- nlmixr(mod005, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)
foceiControl(print = 20)) #only print every 20th estimation step
# and saved for future use or reference:
save(run005, file = "run005.Rdata")
} else {
load(file = "run005.Rdata")
}
# ------------------------------------------------------
# Run 006. Two compartment model
# ------------------------------------------------------
Two.comp.KA.ODE <- function() {
ini({
# Where initial conditions/variables are specified
lKA <- log(1.15) #log ka (1/h)
lCL <- log(0.135) #log Cl (L/h)
lV1 <- log(8) #log V (L)
lQ <- log(0.5) #log Q (L/h)
lV2 <- log(10) #log V2 (L/h)
eta.KA ~ 0.5
eta.CL ~ 0.1
eta.V1 ~ 0.1
prop.err <- 0.15 #proportional error (SD/mean)
add.err <- 0.6 #additive error (mg/L)
})
model({
# Where the model is specified
CL <- exp(lCL + eta.CL)
V1 <- exp(lV1 + eta.V1)
KA <- exp(lKA + eta.KA)
Q <- exp(lQ)
V2 <- exp(lV2)
## ODE example
K20 = CL/V1
K23 = Q/V1
K32 = Q/V2
d/dt(depot) = -KA * depot
d/dt(central) = KA * depot - K23*central + K32*periph - K20 * central
d/dt(periph) = K23*central - K32*periph
## Concentration is calculated
cp = central/V1
## And is assumed to follow proportional error
cp ~ prop(prop.err) + add(add.err)
})
}
rxClean()
## estimate parameters using nlmixr/FOCEI:
mod006 <- Two.comp.KA.ODE
if(run.estimation == T) {
run006 <- nlmixr(mod006, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)\
#FOCEi options:
foceiControl(print = 20)) #only print every 20th estimation step
# and saved for future use or reference:
save(run006, file = "run006.Rdata")
} else {
load(file = "run006.Rdata")
}
# ------------------------------------------------------
# Model 7. Run006 + Add Eta on V2 and Q
# ------------------------------------------------------
mod007 <- mod006 %>% addEta(c("Q", "V2"))
if(run.estimation == T) {
run007 <- nlmixr(mod007, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)
foceiControl(print = 20)) #only print every 20th estimation step
# and saved for future use or reference:
save(run007, file = "run007.Rdata")
} else {
load(file = "run007.Rdata")
}
Various absorption models are run in this section. Unfold
Code to see the models (click on the Show
button).
#################################################################################
## ##
## Update the model with a single transit compartment ##
## ##
#################################################################################
## One compartment transit model
One.comp.transit <- function() {
ini({
# Where initial conditions/variables are specified
lktr <- log(1.15) #log k transit (/h)
lcl <- log(0.135) #log Cl (L/hr)
lv <- log(8) #log V (L)
prop.err <- 0.15 #proportional error (SD/mean)
add.err <- 0.6 #additive error (mg/L)
eta.ktr ~ 0.5
eta.cl ~ 0.1
eta.v ~ 0.1
})
model({
cl <- exp(lcl + eta.cl)
v <- exp(lv + eta.v)
ktr <- exp(lktr + eta.ktr)
# RxODE-style differential equations are supported
d/dt(depot) = -ktr * depot
d/dt(central) = ktr * trans - (cl/v) * central
d/dt(trans) = ktr * depot - ktr * trans
## Concentration is calculated
cp = central/v
# And is assumed to follow proportional and additive error
cp ~ prop(prop.err) + add(add.err)
})
}
# ------------------------------------------------------
# Model 8. Run005 + Add transit absorption compartment
# ------------------------------------------------------
mod008 <- One.comp.transit
if(run.estimation == T) {
run008 <- nlmixr(mod008, #the model definition
PK_dose_data, #the data set
est = "focei", #the estimation algorithm (FOCEi)
foceiControl(print = 5)) #only print every 5th estimation step
# and saved for future use or reference:
save(run008, file = "run008.Rdata")
} else {
load(file = "run008.Rdata")
}
We can construct tables, to compare the OFV of our different runs. Question: Based on these results, which model would you choose to bring forward? Why?
OFV.table <- data.frame(Run = paste0('run00', 1:8),
Description = c('One comp, lin. abs and elim. Eta on CL only - prop. res. error',
'Run001 + Eta on KA',
'Run002 + Eta on V',
'Run003 + add. res. error',
'Run003 + comb. res.error',
'Run005 + Two comp. dist.',
'Run006 + Eta on Q, V2',
'Run005 + Add 1 transit abs. comp.'),
OFV = c(run001$OBJF,
run002$OBJF,
run003$OBJF,
run004$OBJF,
run005$OBJF,
run006$OBJF,
run007$OBJF,
run008$OBJF),
deltaOFV = c(run001$OBJF-run001$OBJF,
run002$OBJF-run001$OBJF,
run003$OBJF-run001$OBJF,
run004$OBJF-run001$OBJF,
run005$OBJF-run001$OBJF,
run006$OBJF-run001$OBJF,
run007$OBJF-run001$OBJF,
run008$OBJF-run001$OBJF),
nParam = c(length(run001$fixef)+nrow(run001$omega),
length(run002$fixef)+nrow(run002$omega),
length(run003$fixef)+nrow(run003$omega),
length(run004$fixef)+nrow(run004$omega),
length(run005$fixef)+nrow(run005$omega),
length(run006$fixef)+nrow(run006$omega),
length(run007$fixef)+nrow(run007$omega),
length(run008$fixef)+nrow(run008$omega)))
OFV.table %>%
flextable() %>%
set_table_properties(layout = "autofit", width = .8) %>%
align(align = "center", part = "header") %>%
autofit
Run | Description | OFV | deltaOFV | nParam |
|---|---|---|---|---|
run001 | One comp, lin. abs and elim. Eta on CL only - prop. res. error | 515.3198 | 0.000000 | 5 |
run002 | Run001 + Eta on KA | 507.7280 | -7.591782 | 6 |
run003 | Run002 + Eta on V | 475.5681 | -39.751629 | 7 |
run004 | Run003 + add. res. error | 439.5365 | -75.783302 | 7 |
run005 | Run003 + comb. res.error | 423.6025 | -91.717255 | 8 |
run006 | Run005 + Two comp. dist. | 406.4720 | -108.847780 | 10 |
run007 | Run006 + Eta on Q, V2 | 406.4796 | -108.840133 | 12 |
run008 | Run005 + Add 1 transit abs. comp. | 320.2339 | -195.085820 | 8 |
So far, we have seen how to explore our data, how to estimate model parameters for simple PK models and how to extend our PK model, either with adding additional layers of IIV, or expanding the model complexity to better describe our data.
For our dataset, we have seen that there is a clear influence of weight on our PK. This not an unexpected behaviour, even when we adjust our dose by body weight (remember, the doses were given in mg/kg). Here, we will do a very brief introduction to covariate model development. We will try to estimate body weight effects on clearance and the volume of distribution. We have selected to work with run008, as this was the most promising model based on the model evaluations so far.
OFV.table_upd <- data.frame(Run = c('run009', 'run010'),
Description = c('Run008 + weight on CL',
'Run009 + weight on CL and V1'),
OFV = c(run009$OBJF,
run010$OBJF),
deltaOFV = c(run009$OBJF-run001$OBJF,
run010$OBJF-run001$OBJF),
nParam = c(length(run009$fixef)+nrow(run009$omega),
length(run010$fixef)+nrow(run010$omega)))
OFV.table %>% bind_rows(OFV.table_upd) %>%
flextable() %>%
set_table_properties(layout = "autofit", width = .8) %>%
align(align = "center", part = "header") %>%
autofit
Run | Description | OFV | deltaOFV | nParam |
|---|---|---|---|---|
run001 | One comp, lin. abs and elim. Eta on CL only - prop. res. error | 515.3198 | 0.000000 | 5 |
run002 | Run001 + Eta on KA | 507.7280 | -7.591782 | 6 |
run003 | Run002 + Eta on V | 475.5681 | -39.751629 | 7 |
run004 | Run003 + add. res. error | 439.5365 | -75.783302 | 7 |
run005 | Run003 + comb. res.error | 423.6025 | -91.717255 | 8 |
run006 | Run005 + Two comp. dist. | 406.4720 | -108.847780 | 10 |
run007 | Run006 + Eta on Q, V2 | 406.4796 | -108.840133 | 12 |
run008 | Run005 + Add 1 transit abs. comp. | 320.2339 | -195.085820 | 8 |
run009 | Run008 + weight on CL | 315.9728 | -199.346923 | 9 |
run010 | Run009 + weight on CL and V1 | 289.7555 | -225.564246 | 10 |
Here, we will compare the VPCs for our runs 001, 008 and 010. What do you observe? Which model captures our data bettter?
bin_mids <- sort(unique(PKdata$TIME))
bin_edges <- bin_mids - c(0, diff(bin_mids) / 2) # puts the edges in the middle of the nominal time points
SimVPC_001 <- vpcSim(
run001, #the nlmixr object
n = 100)
SimVPC_008 <- vpcSim(
run008, #the nlmixr object
n = 100)
SimVPC_010 <- vpcSim(
run010, #the nlmixr object
n = 100)
# We need to make some of the variables in upper case for the tools to work
setnames(SimVPC_001,c("id","time","sim"),c("ID","TIME","DV"))
setnames(SimVPC_008,c("id","time","sim"),c("ID","TIME","DV"))
setnames(SimVPC_010,c("id","time","sim"),c("ID","TIME","DV"))
VPC001 <- vpc_vpc(sim = SimVPC_001, obs = PKdata, bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = TRUE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = TRUE),
xlab = x_label, #x-axis label
ylab = conc_label, #y-axis label
title = "First order absorption PopPK model\n") +
labs(caption = "Run001")
VPC008 <- vpc_vpc(sim = SimVPC_008, obs = PKdata,bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = TRUE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = TRUE),
xlab = x_label, #x-axis label
ylab = conc_label, #y-axis label
title = "Transit compartment absorption\nPopPK model\n") +
labs(caption = "Run008")
VPC010 <- vpc_vpc(sim = SimVPC_010, obs = PKdata,bins=bin_edges,
show = list(obs_dv = TRUE,
obs_median = TRUE,
sim_median = FALSE,
sim_median_ci = TRUE,
obs_ci = TRUE,
pi = FALSE,
pi_ci = TRUE),
xlab = x_label, #x-axis label
ylab = conc_label, #y-axis label
title = "Transit compartment absorption PopPK\nmodel with body weight effect on CL and V1") +
labs(caption = "Run010")
(VPC001 + VPC008) / (VPC010 + plot_spacer())
Zooming in the absorption phase. We also changed the y-scale to logarithmic.
((VPC001 + xgx_scale_y_log10() + coord_cartesian(ylim = c(3,20), xlim = c(0, 30))) +
(VPC008 + xgx_scale_y_log10() + coord_cartesian(ylim = c(3,20), xlim = c(0, 30)))) /
((VPC010 + xgx_scale_y_log10() + coord_cartesian(ylim = c(3,20), xlim = c(0, 30))) +
plot_spacer()
)
End of PopSim 2024 workshop Exercises - Introduction to
Population Pharmacokinetic modeling.